# Histopathology analysis with accessible plots:
> # - One color per treatment (color-blind friendly)
> # - Same symbol for all raw points
> # - Mean ± SD drawn in black (distinct from points)
> # - Dunn (Bonferroni) vs Control annotations
> # =========================================================
>
> # ---- Packages ----
> pkgs <- c("tidyverse","rstatix","car","patchwork","effsize","effectsize","viridis","ggpubr")
> to_install <- pkgs[!pkgs %in% installed.packages()[,"Package"]]
> if(length(to_install)) install.packages(to_install, dep=TRUE)
>
> library(tidyverse)
── Attaching core tidyverse packages ──────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.1 ✔ stringr 1.5.2
✔ ggplot2 4.0.0 ✔ tibble 3.3.0
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.1.0
── Conflicts ────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ lubridate::%within%() masks IRanges::%within%()
✖ dplyr::collapse() masks Biostrings::collapse(), IRanges::collapse()
✖ dplyr::combine() masks BiocGenerics::combine()
✖ purrr::compact() masks XVector::compact()
✖ dplyr::desc() masks IRanges::desc()
✖ tidyr::expand() masks S4Vectors::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::first() masks S4Vectors::first()
✖ dplyr::lag() masks stats::lag()
✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
✖ purrr::reduce() masks IRanges::reduce()
✖ dplyr::rename() masks S4Vectors::rename()
✖ lubridate::second() masks S4Vectors::second()
✖ lubridate::second<-() masks S4Vectors::second<-()
✖ dplyr::slice() masks XVector::slice(), IRanges::slice()
ℹ Use the conflicted package to force all conflicts to become errors
Warning messages:
1: package ‘ggplot2’ was built under R version 4.4.1
2: package ‘tibble’ was built under R version 4.4.1
3: package ‘purrr’ was built under R version 4.4.1
4: package ‘stringr’ was built under R version 4.4.1
5: package ‘forcats’ was built under R version 4.4.1
6: package ‘lubridate’ was built under R version 4.4.1
> library(rstatix) # kruskal_test, dunn_test, kruskal_effsize
Attaching package: ‘rstatix’
The following object is masked from ‘package:IRanges’:
desc
The following object is masked from ‘package:stats’:
filter
> library(car) # leveneTest
Loading required package: carData
Attaching package: ‘car’
The following object is masked from ‘package:dplyr’:
recode
The following object is masked from ‘package:purrr’:
some
Warning message:
package ‘car’ was built under R version 4.4.1
> library(patchwork) # plot panels
Warning message:
package ‘patchwork’ was built under R version 4.4.1
> library(effsize) # cliff.delta
> library(effectsize) # eta_squared (optional)
Attaching package: ‘effectsize’
The following objects are masked from ‘package:rstatix’:
cohens_d, eta_squared
Warning message:
package ‘effectsize’ was built under R version 4.4.1
> library(viridis) # color-blind friendly palettes
Loading required package: viridisLite
> library(ggpubr) # stat_pvalue_manual
Warning message:
package ‘ggpubr’ was built under R version 4.4.1
>
> set.seed(123)
>
> # ---- Data ----
> df <- tribble(
+ ~Treatment, ~CaDig, ~Tubule,
+ "Control", 0.18, 41.52,
+ "Control", 0.14, 78.65,
+ "Control", 0.20, 37.60,
+ "Control", 0.20, 52.21,
+ "Control", 0.21, 44.21,
+ "Control", 0.18, 46.34,
+
+ "T1", 0.21, 47.32,
+ "T1", 0.16, 50.25,
+ "T1", 0.21, 62.20,
+ "T1", 0.18, 40.26,
+ "T1", 0.20, 51.63,
+ "T1", 0.22, 42.39,
+
+ "T2", 0.35, 49.90,
+ "T2", 0.29, 63.28,
+ "T2", 0.30, 59.46,
+ "T2", 0.38, 70.84,
+ "T2", 0.40, 51.92,
+ "T2", 0.41, 70.20,
+
+ "T3", 0.50, 65.53,
+ "T3", 0.55, 113.39,
+ "T3", 0.55, 72.45,
+ "T3", 0.50, 75.28,
+ "T3", 0.56, 80.70,
+ "T3", 0.48, 70.56,
+
+ "T4", 0.71, 65.31,
+ "T4", 0.46, 97.86,
+ "T4", 0.60, 95.09,
+ "T4", 0.57, 65.93,
+ "T4", 0.69, 81.72,
+ "T4", 0.54, 95.70,
+
+ "T5", 0.46, 85.30,
+ "T5", 0.60, 62.94,
+ "T5", 0.875, 91.76,
+ "T5", 1.125, 134.12,
+ "T5", 0.50, 115.08,
+ "T5", 0.78, 124.90
+ ) %>%
+ mutate(Treatment = factor(Treatment, levels = c("Control","T1","T2","T3","T4","T5")))
>
> # ---- Assumption checks ----
> norm_tbl <- df %>%
+ pivot_longer(c(CaDig, Tubule), names_to="Metric", values_to="Value") %>%
+ group_by(Metric, Treatment) %>%
+ summarise(n = n(),
+ W = shapiro.test(Value)$statistic,
+ p_shapiro = shapiro.test(Value)$p.value,
+ .groups = "drop")
> print(norm_tbl, n=24)
# A tibble: 12 × 5
Metric Treatment n W p_shapiro
<chr> <fct> <int> <dbl> <dbl>
1 CaDig Control 6 0.871 0.231
2 CaDig T1 6 0.905 0.404
3 CaDig T2 6 0.899 0.366
4 CaDig T3 6 0.852 0.163
5 CaDig T4 6 0.956 0.792
6 CaDig T5 6 0.935 0.620
7 Tubule Control 6 0.797 0.0551
8 Tubule T1 6 0.941 0.668
9 Tubule T2 6 0.908 0.425
10 Tubule T3 6 0.769 0.0301
11 Tubule T4 6 0.820 0.0880
12 Tubule T5 6 0.955 0.784
>
> lev_cadig <- car::leveneTest(CaDig ~ Treatment, data = df, center = median)
> lev_tub <- car::leveneTest(Tubule ~ Treatment, data = df, center = median)
> flg_cadig <- fligner.test(CaDig ~ Treatment, data = df)
> flg_tub <- fligner.test(Tubule ~ Treatment, data = df)
> print(lev_cadig); print(lev_tub); print(flg_cadig); print(flg_tub)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 5 9.1112 2.356e-05 ***
30
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 5 2.1548 0.08593 .
30
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Fligner-Killeen test of homogeneity of variances
data: CaDig by Treatment
Fligner-Killeen:med chi-squared = 22.666, df = 5, p-value = 0.000391
Fligner-Killeen test of homogeneity of variances
data: Tubule by Treatment
Fligner-Killeen:med chi-squared = 10.979, df = 5, p-value = 0.05179
>
> # ---- Descriptive stats ----
> summary_tbl <- df %>%
+ pivot_longer(c(CaDig,Tubule), names_to="Metric", values_to="Value") %>%
+ group_by(Metric, Treatment) %>%
+ summarise(n = n(), mean = mean(Value), sd = sd(Value),
+ median = median(Value), IQR = IQR(Value), .groups="drop")
> print(summary_tbl, n=12)
# A tibble: 12 × 7
Metric Treatment n mean sd median IQR
<chr> <fct> <int> <dbl> <dbl> <dbl> <dbl>
1 CaDig Control 6 0.185 0.0251 0.19 0.0200
2 CaDig T1 6 0.197 0.0225 0.205 0.025
3 CaDig T2 6 0.355 0.0509 0.365 0.0825
4 CaDig T3 6 0.523 0.0339 0.525 0.0500
5 CaDig T4 6 0.595 0.0940 0.585 0.12
6 CaDig T5 6 0.723 0.254 0.69 0.326
7 Tubule Control 6 50.1 14.8 45.3 8.55
8 Tubule T1 6 49.0 7.82 48.8 7.66
9 Tubule T2 6 60.9 8.88 61.4 14.7
10 Tubule T3 6 79.7 17.3 73.9 8.31
11 Tubule T4 6 83.6 15.0 88.4 25.7
12 Tubule T5 6 102. 27.0 103. 35.5
>
> # ---- Kruskal–Wallis + global effect size (eta2[H]) ----
> kw_cadig <- rstatix::kruskal_test(df, CaDig ~ Treatment)
> kw_tub <- rstatix::kruskal_test(df, Tubule ~ Treatment)
> print(kw_cadig); print(kw_tub)
# A tibble: 1 × 6
.y. n statistic df p method
* <chr> <int> <dbl> <int> <dbl> <chr>
1 CaDig 36 30.1 5 0.0000142 Kruskal-Wallis
# A tibble: 1 × 6
.y. n statistic df p method
* <chr> <int> <dbl> <int> <dbl> <chr>
1 Tubule 36 23.5 5 0.000265 Kruskal-Wallis
>
> eff_kw_cadig <- rstatix::kruskal_effsize(df, CaDig ~ Treatment) # eta2[H]
> eff_kw_tub <- rstatix::kruskal_effsize(df, Tubule ~ Treatment)
> print(eff_kw_cadig); print(eff_kw_tub)
# A tibble: 1 × 5
.y. n effsize method magnitude
* <chr> <int> <dbl> <chr> <ord>
1 CaDig 36 0.836 eta2[H] large
# A tibble: 1 × 5
.y. n effsize method magnitude
* <chr> <int> <dbl> <chr> <ord>
1 Tubule 36 0.618 eta2[H] large
>
> # ---- Dunn (Bonferroni), then filter vs Control ----
> dunn_cadig_all <- df %>% rstatix::dunn_test(CaDig ~ Treatment, p.adjust.method = "bonferroni")
> dunn_tub_all <- df %>% rstatix::dunn_test(Tubule ~ Treatment, p.adjust.method = "bonferroni")
>
> keep_vs_control <- function(tbl, control="Control"){
+ tbl %>%
+ dplyr::filter(group1 == control | group2 == control) %>%
+ dplyr::mutate(
+ grp2 = ifelse(group1 == control, group2, group1),
+ group1 = control,
+ group2 = grp2
+ ) %>%
+ dplyr::select(-grp2) %>%
+ dplyr::arrange(group2)
+ }
> dunn_cadig <- keep_vs_control(dunn_cadig_all, "Control")
> dunn_tub <- keep_vs_control(dunn_tub_all, "Control")
>
> p_stars <- function(p) dplyr::case_when(
+ p <= 0.001 ~ "***",
+ p <= 0.01 ~ "**",
+ p <= 0.05 ~ "*",
+ TRUE ~ "ns"
+ )
> dunn_cadig <- dunn_cadig %>% dplyr::mutate(label = p_stars(p.adj))
> dunn_tub <- dunn_tub %>% dplyr::mutate(label = p_stars(p.adj))
>
> # ---- Pairwise effect size: Cliff's delta vs Control ----
> levels_trt <- levels(df$Treatment)
> ref <- "Control"
> cd_cadig <- purrr::map_dfr(levels_trt[-1], ~{
+ x <- df$CaDig[df$Treatment==.x]; y <- df$CaDig[df$Treatment==ref]
+ d <- effsize::cliff.delta(x, y)
+ tibble(Metric="CaDig", group1=ref, group2=.x, cliffs_delta=d$estimate, magnitude=d$magnitude)
+ })
Warning messages:
1: In cliff.delta.default(x, y) :
The samples are fully disjoint, using approximate Confidence Interval estimation
2: In cliff.delta.default(x, y) :
The samples are fully disjoint, using approximate Confidence Interval estimation
3: In cliff.delta.default(x, y) :
The samples are fully disjoint, using approximate Confidence Interval estimation
4: In cliff.delta.default(x, y) :
The samples are fully disjoint, using approximate Confidence Interval estimation
> cd_tub <- purrr::map_dfr(levels_trt[-1], ~{
+ x <- df$Tubule[df$Treatment==.x]; y <- df$Tubule[df$Treatment==ref]
+ d <- effsize::cliff.delta(x, y)
+ tibble(Metric="Tubule", group1=ref, group2=.x, cliffs_delta=d$estimate, magnitude=d$magnitude)
+ })
>
> # ---- Build significance annotations (only p < 0.05) ----
> max_mean_sd <- function(v) {
+ df %>% group_by(Treatment) %>% summarise(m=mean(.data[[v]]), s=sd(.data[[v]]), .groups="drop") %>%
+ summarise(max(m+s)) %>% pull()
+ }
> max_cadig <- max_mean_sd("CaDig")
> max_tub <- max_mean_sd("Tubule")
>
> sig_cadig <- dunn_cadig %>% filter(p.adj < 0.05) %>%
+ mutate(y.position = seq(from = max_cadig*1.08, by = max_cadig*0.06, length.out = n()))
> sig_tub <- dunn_tub %>% filter(p.adj < 0.05) %>%
+ mutate(y.position = seq(from = max_tub*1.08, by = max_tub*0.06, length.out = n()))
>
> # ---- Plotting (one color per treatment, same shape for all points; mean±SD in black) ----
> base_theme <- theme_minimal(base_size = 12) +
+ theme(panel.grid.major.x = element_blank(),
+ axis.text.x = element_text(size = 11),
+ legend.position = "right")
>
> make_plot <- function(data, yvar, ylab, sig_df=NULL){
+ # Summary for mean ± SD
+ sumdf <- data %>%
+ group_by(Treatment) %>%
+ summarise(mean = mean(.data[[yvar]]),
+ sd = sd(.data[[yvar]]),
+ .groups="drop")
+
+ p <- ggplot(data, aes(x = Treatment, y = .data[[yvar]])) +
+ # Raw points: one color per treatment, same symbol (circles)
+ geom_jitter(aes(color = Treatment),
+ width = 0.12, height = 0, alpha = 0.85, size = 2.6) +
+ # Mean ± SD: draw in black (distinct from raw points)
+ geom_errorbar(data = sumdf,
+ aes(x = Treatment, y = mean, ymin = mean - sd, ymax = mean + sd),
+ width = 0.22, size = 0.7, color = "black", inherit.aes = FALSE) +
+ geom_point(data = sumdf, aes(x = Treatment, y = mean),
+ size = 3.2, color = "black", inherit.aes = FALSE) +
+ scale_color_viridis_d(option = "cividis", begin = 0.05, end = 0.95, name = "Treatment") +
+ labs(x = NULL, y = ylab) +
+ base_theme
+
+ if(!is.null(sig_df) && nrow(sig_df) > 0){
+ p <- p + ggpubr::stat_pvalue_manual(
+ sig_df,
+ label = "label",
+ y.position = "y.position",
+ tip.length = 0.01,
+ hide.ns = TRUE,
+ bracket.size = 0.45,
+ size = 3.5,
+ color = "black"
+ )
+ }
+ p
+ }
>
> p1 <- make_plot(df, "CaDig", "Ca/Dig (mean ± SD)", sig_cadig)
Warning message:
Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
> p2 <- make_plot(df, "Tubule","Tubule diameter (µm; mean ± SD)", sig_tub)
>
> panel <- p1 / p2 + plot_annotation(
+ title = "Histopathology indices across treatments (mean ± SD)\nAnnotations: Dunn (Bonferroni) vs Control",
+ theme = theme(plot.title = element_text(face="bold", size=13))
+ )
>
> # ---- Show and export ----
> print(panel)
> ggsave("histopathology_mean_sd_annotated_singleShape.png", panel, width = 9.5, height = 8, dpi = 300)
> ggsave("histopathology_mean_sd_annotated_singleShape.tiff", panel, width = 9.5, height = 8, dpi = 300, compression = "lzw")
> ggsave("histopathology_mean_sd_annotated_singleShape.pdf", panel, width = 9.5, height = 8, device = cairo_pdf)
>
> # ---- Export tables ----
> readr::write_csv(norm_tbl, "normality_shapiro_by_group.csv")
> readr::write_csv(summary_tbl, "summary_mean_sd_median_IQR.csv")
> readr::write_csv(dunn_cadig, "dunn_vs_control_CaDig_bonferroni.csv")
> readr::write_csv(dunn_tub, "dunn_vs_control_Tubule_bonferroni.csv")
> readr::write_csv(cd_cadig, "cliffs_delta_vs_control_CaDig.csv")
> readr::write_csv(cd_tub, "cliffs_delta_vs_control_Tubule.csv")
> capture.output(kw_cadig, file="kw_CaDig.txt")
> capture.output(kw_tub, file="kw_Tubule.txt")
> capture.output(eff_kw_cadig, file="kw_effsize_eta2H_CaDig.txt")
> capture.output(eff_kw_tub, file="kw_effsize_eta2H_Tubule.txt")
>
> cat("\nDone ✅\nAccessible figures generated with:\n",
+ "- One color per treatment (cividis) & same symbol for all raw points.\n",
+ "- Mean ± SD in black, clearly distinguished from points.\n")
Done ✅
Accessible figures generated with:
- One color per treatment (cividis) & same symbol for all raw points.
- Mean ± SD in black, clearly distinguished from points.